# -*- coding: utf-8 -*-

import numpy  as np
import pandas as pd
import os,time,sort
from swrc2 import *

import matplotlib.pyplot as plt

from scipy.stats import pearsonr      #calculate r
from sklearn.metrics import r2_score  #calculate R^2

#dirlist = os.listdir('data')
#dir_file = sort.sort_sl(dirlist)[1]
def get_num_of_observed_point():
    # return the number of observations
    num_of_observed_point = []
    for code in texture.code:
        num = ht[ht.code==code].shape[0]
        if num == 1 and pd.isnull(ht[ht.code==code].h).values[0]:
            num = 0
        num_of_observed_point.append(num)
    return pd.DataFrame(num_of_observed_point,columns=['num'])

def get_texture_data():
#    particle_size=pd.read_excel('data.xlsx',sheet_name='size',header=None)
    tex = {"code":[],"clay":[],"silt":[],"sand":[]}
    tex = pd.DataFrame(tex)
    tex.code = BD.code  # outer cited
    for code in tex.code.values.tolist():
        psda = size[size.code==code]
        s2, s2_50, s50 = np.nan, np.nan, np.nan
        if 2 in list(psda['size']) and 50 in list(psda['size']):
            s2    = round(float(psda[psda['size']==2].percent),3)
            s2_50 = round(float(psda[psda['size']==50].percent)-s2,3)
            s50   = round(1-s2-s2_50,3)
        elif not (2 in list(psda['size']) and 50 in list(psda['size'])) and \
             np.sum(psda['size']<=2)>=1 and np.sum(psda['size']>=50)>=1:
            for i in range(psda.shape[0]-1):
                if psda.iloc[i,1]<=2 and psda.iloc[i+1,1]>=2:
                    k     = float((psda.iloc[i+1,2]-psda.iloc[i,2])/(psda.iloc[i+1,1]-psda.iloc[i,1]))
                    s2    = round(psda.iloc[i,2]+k*(2-psda.iloc[i,1]),3)
                if psda.iloc[i,1]<50 and psda.iloc[i+1,1]>=50:
                    k     = float((psda.iloc[i+1,2]-psda.iloc[i,2])/(psda.iloc[i+1,1]-psda.iloc[i,1]))
                    s2_50 = round(psda.iloc[i,2]+k*(50-psda.iloc[i,1])-s2,3)
            s50 = round(1-s2-s2_50,3)
        tex.loc[tex[tex.code==code].index.tolist(),['clay']] = s2
        tex.loc[tex[tex.code==code].index.tolist(),['silt']] = s2_50
        tex.loc[tex[tex.code==code].index.tolist(),['sand']] = s50
    tex = tex[['code','clay','silt','sand']]  # change order of cols
    return tex


# load fitting data
fitted_new = pd.read_csv('data2/'+'five_new_model_fitted.csv')
fitted_old = pd.read_csv('data2/'+'three_trad_fitted_best.csv')

# load data
data   = pd.read_excel('data2/'+'data.xlsx',None,header=None)
ht = data['ht'].copy()
ht.columns=['code', 'h', 't']
size = data['size'].copy()
size.columns=['code', 'size', 'percent']
texture = data['texture'].copy()
texture.columns=['code', 'texture']
Ks = data['Ks'].copy()
Ks.columns=['code', 'Ks']
BD = data['BD'].copy()
BD.columns=['code', 'BD']
BD.loc[BD.BD=='No data','BD'] = np.nan   # use "nan" to represent "No data"
BD.BD = np.array(BD.BD.tolist())         # to arr
OM = data['OM'].copy()
OM.columns=['code', 'OM']
OM.loc[OM.OM=='No data','OM'] = np.nan   # use "nan" to represent "No data"
OM.OM = np.array(OM.OM.tolist())         # to arr

tex = get_texture_data()

# data concat
da = pd.concat([texture.code,
                fitted_old.iloc[:,1:16],
                fitted_new.iloc[:,16:],
                get_num_of_observed_point(),
                Ks.Ks,
                BD.BD,
                OM.OM,
                texture.texture,
                tex.clay,
                tex.silt,
                tex.sand],axis=1)

def model_ordinal_transform(case):
    # match model names 
    models = ['BC', 'VG', 'KO', 'HP', 'AT', 'GD', 'SG', 'CA']
    #ordinal=[ 0     1     2     3     4     5     6     7]
    if type(case) is str:
        return models.index(case)
    elif type(case) is int:
        return models[case]

def para_of_sample(code, model):
    # return fitted para
    n = model_ordinal_transform(model)
    return np.array(da[da.code==code].iloc[:,n*5+1:n*5+5].values.tolist()[0])

def rmse_of_sample(code, model):
    #return rmse
    n = model_ordinal_transform(model)
    return float(da[da.code==code].iloc[:,n*5+5])

db = da.copy()  #save SLCAP optimized data

from scipy.optimize import minimize

models = ['BC', 'VG', 'KO', 'HP', 'AT', 'GD', 'SG', 'CA']
bnds = [((0,0.2),(0.2,0.8),(1,20000),(0.01,10)), #BC
        ((0,0.2),(0.2,0.8),(0.001,1),(1,10)   ), #VG
        ((0,0.2),(0.2,0.8),(1,50000),(0.01,10)), #KO
        ((0,0.2),(0.2,0.8),(0.01,20),(0.01,20)), #HP
        ((0,0.2),(0.2,0.8),(0.01,20),(0.01,20)), #AT
        ((0,0.2),(0.2,0.8),(0.01,20),(0.01,20)), #GD
        ((0,0.2),(0.2,0.8),(0.01,20),(0.01,20)), #SG
        ((0,0.2),(0.2,0.8),(0.01,20),(0.01,20))] #CA

def rmse(a,b):
    a = np.array(a)
    b = np.array(b)
    c = np.sqrt(np.sum((a-b)**2)/a.shape[0])
    return c

def BCobj(para):
    return rmse(BC(h,para),t)
def VGobj(para):
    return rmse(VG(h,para),t)
def KOobj(para):
    return rmse(KO(h,para),t)
def HPobj(para):
    return rmse(HP(h,para),t)
def ATobj(para):
    return rmse(AT(h,para),t)
def GDobj(para):
    return rmse(GD(h,para),t)
def SGobj(para):
    return rmse(SG(h,para),t)
def CAobj(para):
    return rmse(CA(h,para),t)

# nonlinear least square
# (SLCAP       time 3m05s)
# (Nelder-Mead time 23m26s)
print(time.strftime("%Y-%m-%d %H:%M:%S"))
for code in db.code:
    if code in fitted_new.code.values.tolist():
        print('%4s'%db[db.code==code].index.values.tolist()[0],end='')
        h = ht[ht.code==code].h
        t = ht[ht.code==code].t        
        for mode in models:
            modn = models.index(mode)
            para = para_of_sample(code,mode)
            perf = rmse_of_sample(code,mode) #perf->rmse
            sol = eval("minimize("+ mode +"obj, para, method='SLCAP', bounds=bnds[modn])")
#            sol = eval("minimize("+ mode +"obj, para, method='Nelder-Mead')")
            #eg: sol = minimize(BCobj, para, method='SLCAP', bounds=bnds[modn])
            if perf > sol.fun:
                db.iloc[db[db.code==code].index,modn*5+1:modn*5+5] = sol.x    # save para
                db.iloc[db[db.code==code].index,modn*5+5] = sol.fun           # save rmse
print()
print(time.strftime("%Y-%m-%d %H:%M:%S"))

dc = da[fitted_new.code!=0]
dd = db[fitted_new.code!=0]

d1 = dc.describe().loc[['count','mean','std','min','max'],:]  #save the initial data
d2 = dd.describe().loc[['count','mean','std','min','max'],:]  #save the optimized data


dc.to_csv('GA_8_curves_ES_up.csv',index=False)
dd.to_csv('best_8_curves_ES_up.csv',index=False)




for mode in models:
    print(mode,eval('sum(dc.rmse_'+mode+'>dd.rmse_'+mode+')'))

dc[dc.rmse_SG<=dd.rmse_SG]

a = dc[dc.rmse_AT<=dd.rmse_AT]
b = dd[dc.rmse_AT<=dd.rmse_AT]

a0 = []
for i in sorted(dc.num.value_counts().index.tolist()):
    a0.append([i,dc[dc.num==i].rmse_BC.mean()])

def plot_code(code):
    h = ht[ht.code==code].h
    t = ht[ht.code==code].t
    plt.scatter(np.log10(h),t,label='Obs')
    plt.xticks([0,1,2,3,4,5],['1','10','100','1000','10000','100000'])
#    plt.plot(lo_curve,np.log10(h),label='lo')
    plt.title('code='+str(code),
              {'size':18},loc='left')
    plt.ylabel(r'water content ${\theta}$'+r' (cm$^3$/cm$^3$)',{'size':16})
    plt.xlabel('pressure head $h$ (cm)',{'size':16})
    plt.xlim(-0.2,np.log10(np.max(h))+0.3)
    plt.ylim(0,0.6)